The following script was used to produce some of the figures seen in Figure 3, and Supplemental Figure 5
rm(list = ls())
if (is.integer(dev.list())){dev.off()}
## null device
## 1
cat("\014")
set.seed(1)
source("functions.R")
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.1 ✓ purrr 0.3.4
## ✓ tibble 3.0.1 ✓ dplyr 0.8.5
## ✓ tidyr 1.1.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
## mixtools package, version 1.2.0, Released 2020-02-05
## This package is based upon work supported by the National Science Foundation under Grant No. SES-0518772.
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colMeans,
## colnames, colSums, dirname, do.call, duplicated, eval, evalq,
## Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply,
## lengths, Map, mapply, match, mget, order, paste, pmax, pmax.int,
## pmin, pmin.int, Position, rank, rbind, Reduce, rowMeans, rownames,
## rowSums, sapply, setdiff, sort, table, tapply, union, unique,
## unsplit, which, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:data.table':
##
## first, second
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following object is masked from 'package:base':
##
## expand.grid
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:data.table':
##
## shift
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
##
##
## Attaching package: 'gplots'
## The following object is masked from 'package:IRanges':
##
## space
## The following object is masked from 'package:S4Vectors':
##
## space
## The following object is masked from 'package:stats':
##
## lowess
## ========================================
## circlize version 0.4.9
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(circlize))
## ========================================
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggthemes':
##
## theme_map
## Loading required package: XML
## Loading required package: grid
##
## Attaching package: 'grid'
## The following object is masked from 'package:mixtools':
##
## depth
## ========================================
## ComplexHeatmap version 1.20.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://bioconductor.org/packages/ComplexHeatmap/
##
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
## ========================================
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
##
## get_legend
Loading in data used
avana_res <- read.delim("Data/avana_output_v2.txt")
seed_cluster_correlated <- read.csv("Data/seed_cluster_correlated.csv") ### edges determined by dPCC methods
cell_line_key <- read_csv("Data/DepMap-2018q4-celllines.csv")
## Parsed with column specification:
## cols(
## DepMap_ID = col_character(),
## CCLE_Name = col_character(),
## Aliases = col_character(),
## COSMIC_ID = col_double(),
## `Sanger ID` = col_double(),
## `Primary Disease` = col_character(),
## `Subtype Disease` = col_character(),
## Gender = col_character(),
## Source = col_character()
## )
## Warning: 2 parsing failures.
## row col expected actual file
## 1329 Source delimiter or quote G 'Data/DepMap-2018q4-celllines.csv'
## 1329 NA 9 columns 15 columns 'Data/DepMap-2018q4-celllines.csv'
cell_key <- cell_line_key %>% dplyr::select(DepMap_ID,`Subtype Disease`,CCLE_Name)
colnames(cell_key) = c('Cell_Line',"Subtype","CCLE")
rm(cell_line_key)
Using 5 genes from the cluster for gene x gene plots
gene_dat <- avana_res %>% filter(GENE %in% c("GPI","FASN","GPAT4","ACACA","CHP1")) %>% dplyr::select(GENE,Cell_Line,Mod_Z_Score)
gene_dat <- spread(gene_dat,GENE,Mod_Z_Score)
gene_dat <- merge(gene_dat,cell_key)
gene_dat$Cancer_Type = "Other"
gene_dat$Cancer_Type[gene_dat$Subtype == "Acute Myelogenous Leukemia (AML)"] = "AML"
#pdf("/export/wflenoir/TSG/paper_figs_code/fig/3-FASN_GPAT4.pdf",height = 10,width=10)
gene_dat %>% ggplot(aes(FASN,GPAT4,color = Cancer_Type)) +
geom_point(alpha = 0.5,size = 3) +
scale_color_manual(values=c("#f03c4e","#5c5c5c")) + xlab("FASN Mod-Z-Score") + ylab("GPAT4 Mod-Z-Score") +
theme_Publication() + geom_smooth(method = "lm", se = FALSE,size = 1.5) +
stat_cor(aes(color = Cancer_Type),method = "pearson",label.x.npc = 0.5) + theme(legend.position='none')
## `geom_smooth()` using formula 'y ~ x'
#dev.off()
#pdf("/export/wflenoir/TSG/paper_figs_code/fig/3-FASN_GPI.pdf",height = 10,width=10)
gene_dat %>% ggplot(aes(FASN,GPI,color = Cancer_Type)) +
geom_point(alpha = 0.5,size = 3) +
scale_color_manual(values=c("#f03c4e","#5c5c5c")) + xlab("FASN Mod-Z-Score") + ylab("GPI Mod-Z-Score") +
theme_Publication() + geom_smooth(method = "lm", se = FALSE,size = 1.5) +
stat_cor(aes(color = Cancer_Type),method = "pearson",label.x.npc = 0.5) + theme(legend.position='none')
## `geom_smooth()` using formula 'y ~ x'
#dev.off()
#pdf("/export/wflenoir/TSG/paper_figs_code/fig/3-FASN_CHP1.pdf",height = 10,width=10)
gene_dat %>% ggplot(aes(FASN,CHP1,color = Cancer_Type)) +
geom_point(alpha = 0.5,size = 3) +
scale_color_manual(values=c("#f03c4e","#5c5c5c")) + xlab("FASN Mod-Z-Score") + ylab("CHP1 Mod-Z-Score") +
theme_Publication() + geom_smooth(method = "lm", se = FALSE,size = 1.5) +
stat_cor(aes(color = Cancer_Type),method = "pearson",label.x.npc = 0.5) + theme(legend.position='none')
## `geom_smooth()` using formula 'y ~ x'
#dev.off()
#pdf("/export/wflenoir/TSG/paper_figs_code/fig/3-FASN_ACACA.pdf",height = 10,width=10)
gene_dat %>% ggplot(aes(FASN,ACACA,color = Cancer_Type)) +
geom_point(alpha = 0.5,size = 3) +
scale_color_manual(values=c("#f03c4e","#5c5c5c")) + xlab("FASN Mod-Z-Score") + ylab("ACACA Mod-Z-Score") +
theme_Publication() + geom_smooth(method = "lm", se = FALSE,size = 1.5) +
stat_cor(aes(color = Cancer_Type),method = "pearson",label.x.npc = 0.5) + theme(legend.position='none')
## `geom_smooth()` using formula 'y ~ x'
#dev.off()
#pdf("/export/wflenoir/TSG/paper_figs_code/fig/3-GPI_GPAT4.pdf",height = 10,width=10)
gene_dat %>% ggplot(aes(GPI,GPAT4,color = Cancer_Type)) +
geom_point(alpha = 0.5,size = 3) +
scale_color_manual(values=c("#f03c4e","#5c5c5c")) + xlab("GPI Mod-Z-Score") + ylab("GPAT4 Mod-Z-Score") +
theme_Publication() + geom_smooth(method = "lm", se = FALSE,size = 1.5) +
stat_cor(aes(color = Cancer_Type),method = "pearson",label.x.npc = 0.5) + theme(legend.position='none')
## `geom_smooth()` using formula 'y ~ x'
#dev.off()
#pdf("/export/wflenoir/TSG/paper_figs_code/fig/3-GPI_CHP1.pdf",height = 10,width=10)
gene_dat %>% ggplot(aes(GPI,CHP1,color = Cancer_Type)) +
geom_point(alpha = 0.5,size = 3) +
scale_color_manual(values=c("#f03c4e","#5c5c5c")) + xlab("GPI Mod-Z-Score") + ylab("CHP1 Mod-Z-Score") +
theme_Publication() + geom_smooth(method = "lm", se = FALSE,size = 1.5) +
stat_cor(aes(color = Cancer_Type),method = "pearson",label.x.npc = 0.5) + theme(legend.position='none')
## `geom_smooth()` using formula 'y ~ x'
#dev.off()
#pdf("/export/wflenoir/TSG/paper_figs_code/fig/3-GPI_ACACA.pdf",height = 10,width=10)
gene_dat %>% ggplot(aes(GPI,ACACA,color = Cancer_Type)) +
geom_point(alpha = 0.5,size = 3) +
scale_color_manual(values=c("#f03c4e","#5c5c5c")) + xlab("GPI Mod-Z-Score") + ylab("ACACA Mod-Z-Score") +
theme_Publication() + geom_smooth(method = "lm", se = FALSE,size = 1.5) +
stat_cor(aes(color = Cancer_Type),method = "pearson",label.x.npc = 0.5) + theme(legend.position='none')
## `geom_smooth()` using formula 'y ~ x'
#dev.off()
#pdf("/export/wflenoir/TSG/paper_figs_code/fig/3-GPAT4_ACACA.pdf",height = 10,width=10)
gene_dat %>% ggplot(aes(GPAT4,ACACA,color = Cancer_Type)) +
geom_point(alpha = 0.5,size = 3) +
scale_color_manual(values=c("#f03c4e","#5c5c5c")) + xlab("GPAT4 Mod-Z-Score") + ylab("ACACA Mod-Z-Score") +
theme_Publication() + geom_smooth(method = "lm", se = FALSE,size = 1.5) +
stat_cor(aes(color = Cancer_Type),method = "pearson",label.x.npc = 0.5) + theme(legend.position='none')
## `geom_smooth()` using formula 'y ~ x'
#dev.off()
#pdf("/export/wflenoir/TSG/paper_figs_code/fig/3-GPAT4_CHP1.pdf",height = 10,width=10)
gene_dat %>% ggplot(aes(GPAT4,CHP1,color = Cancer_Type)) +
geom_point(alpha = 0.5,size = 3) +
scale_color_manual(values=c("#f03c4e","#5c5c5c")) + xlab("GPAT4 Mod-Z-Score") + ylab("ACACA Mod-Z-Score") +
theme_Publication() + geom_smooth(method = "lm", se = FALSE,size = 1.5) +
stat_cor(aes(color = Cancer_Type),method = "pearson",label.x.npc = 0.5) + theme(legend.position='none')
## `geom_smooth()` using formula 'y ~ x'
#dev.off()
#pdf("/export/wflenoir/TSG/paper_figs_code/fig/3-CHP1_ACACA.pdf",height = 10,width=10)
gene_dat %>% ggplot(aes(CHP1,ACACA,color = Cancer_Type)) +
geom_point(alpha = 0.5,size = 3) +
scale_color_manual(values=c("#f03c4e","#5c5c5c")) + xlab("CHP1 Mod-Z-Score") + ylab("ACACA Mod-Z-Score") +
theme_Publication() + geom_smooth(method = "lm", se = FALSE,size = 1.5) +
stat_cor(aes(color = Cancer_Type),method = "pearson",label.x.npc = 0.5) + theme(legend.position='none')
## `geom_smooth()` using formula 'y ~ x'
#dev.off()
A couple dPCC examples showing that the signal observed is bad, and need to be filtered.
gene_dat <- avana_res %>% filter(GENE %in% c("MYCN","EVPL")) %>% dplyr::select(GENE,Cell_Line,Mod_Z_Score)
gene_dat <- spread(gene_dat,GENE,Mod_Z_Score)
gene_dat <- merge(gene_dat,cell_key)
gene_dat$Cancer_Type = "Other"
gene_dat$Cancer_Type[gene_dat$Subtype == "Acute Myelogenous Leukemia (AML)"] = "AML"
gene_dat_aml <- gene_dat %>% filter(Cancer_Type == "AML")
#pdf("/export/wflenoir/TSG/paper_figs_code/fig/3-EVPL-MYCN.pdf",height = 10,width=10)
gene_dat %>% ggplot(aes(EVPL,MYCN,color = Cancer_Type)) +
geom_point(alpha = 0.5,size = 3) +
scale_color_manual(values=c("#f03c4e","#5c5c5c")) + xlab("EVPL Mod-Z-Score") + ylab("MYCN Mod-Z-Score") +
theme_Publication() + geom_smooth(method = "lm", se = FALSE,size = 1.5) +
stat_cor(aes(color = Cancer_Type),method = "pearson",label.x.npc = 0.5) + theme(legend.position='none')
## `geom_smooth()` using formula 'y ~ x'
#dev.off()
gene_dat <- avana_res %>% filter(GENE %in% c("ATOH8","KCNK13")) %>% dplyr::select(GENE,Cell_Line,Mod_Z_Score)
gene_dat <- spread(gene_dat,GENE,Mod_Z_Score)
gene_dat <- merge(gene_dat,cell_key)
gene_dat$Cancer_Type = "Other"
gene_dat$Cancer_Type[gene_dat$Subtype == "Acute Myelogenous Leukemia (AML)"] = "AML"
#pdf("/export/wflenoir/TSG/paper_figs_code/fig/3-KCNK13-ATOH8.pdf",height = 10,width=10)
gene_dat %>% ggplot(aes(x = ATOH8, y = KCNK13,color = Cancer_Type)) +
geom_point(alpha = 0.5,size = 3) +
scale_color_manual(values=c("#f03c4e","#5c5c5c")) + xlab("ATOH8 Mod-Z-Score") + ylab("KCNK13 Mod-Z-Score") +
theme_Publication() + geom_smooth(method = "lm", se = FALSE,size = 1.5) +
stat_cor(aes(color = Cancer_Type),method = "pearson",label.x.npc = 0.5) + theme(legend.position='none')
## `geom_smooth()` using formula 'y ~ x'
#dev.off()
Compiled heatmap of genes selected through dPCC
AML_Heatmap <- avana_res %>% filter(GENE %in% seed_cluster_correlated$name) %>% dplyr::select(GENE,Cell_Line,Mod_Z_Score)
AML_Heatmap <- spread(AML_Heatmap,GENE,Mod_Z_Score)
AML_Heatmap <- merge(AML_Heatmap,cell_key)
AML_Heatmap <- AML_Heatmap %>% filter(Subtype == "Acute Myelogenous Leukemia (AML)")
temp <- AML_Heatmap %>% dplyr::select(Cell_Line, CCLE)
rownames(AML_Heatmap) <- map(strsplit(AML_Heatmap$CCLE,"_",fixed = TRUE), 1) %>% unlist()
AML_Heatmap <- AML_Heatmap %>% dplyr::select(-Cell_Line,-Subtype,-CCLE)
col_fun = colorRamp2(c(-6, 0, 6), c("#e60505",'white',"#69cfff"))
#AML_Heatmap <- AML_Heatmap[match(order(AML_Heatmap$FASN),rownames(AML_Heatmap)),]
#pdf("../paper_figs_code/fig/AML_Heatmap_bipartite.pdf",width = 15,height = 10)
Heatmap(
as.matrix(AML_Heatmap),col = col_fun, cluster_rows = TRUE,
clustering_distance_rows = "pearson",
#clustering_distance_columns = "pearson",
cluster_columns = TRUE,show_heatmap_legend = TRUE,
row_names_gp = gpar(fontsize = 20))
#dev.off()